# -*- coding: utf-8 -*-
import fileinput

import scrapy
from search import settings

from urllib.parse import quote
import urllib

from sqlalchemy import null


class BaiduSpider(scrapy.Spider):
    name = 'baidu'
    allowed_domains = []
    start_urls = []

    def __init__(self):
        for line in open("01.txt", "r", newline=None, encoding="utf-8"):
            self.start_urls.append("https://www.baidu.com/s?wd=" + line)

    # 该方法用于 随机请求头
    def start_requests(self):
        for url in self.start_urls:
            headers = {
                'User-Agent': settings.USER_AGENT,
                 'Host': 'www.baidu.com',
                'Cookie':'BAIDUID=B019883A489557949687FCD6626D7F19:FG=1; BIDUPSID=B019883A489557949687FCD6626D7F19; PSTM=1592730823; BD_UPN=12314753; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_WISE_SIDS=148077_148178_149981_150073_147087_150087_148194_148867_150794_148713_150744_147280_150165_149586_149540_150154_148754_147889_148524_151033_127969_149571_149907_146550_150563_149718_146652_150346_146732_149557_145987_131423_100805_147528_150000_107313_148186_147717_149251_150909_140311_144966_149279_149771_148750_147546_148868_150377_110085; BDUSS_BFESS=RhT2FHNH5zd08zVzZ4YXN5dWxlOFhUUmpleHk3MGczZFdqR2w0Mm0tWkg1eXRmSVFBQUFBJCQAAAAAAAAAAAEAAABNDjuaze3To2lpAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEdaBF9HWgRfaU; B64_BOT=1; BD_HOME=1; delPer=0; BD_CK_SAM=1; shifen[178598631444_36807]=1594270315; shifen[179492113431_64893]=1594270344; shifen[162086346330_81195]=1594270347; shifen[179024552069_6064]=1594270351; H_PS_PSSID=32096_1447_31326_32139_32046_32230_32144_32259_31640; shifen[100824232723_90641]=1594270369; BCLID=8797288329604936158; BDSFRCVID=BVFOJeC6249GklJr3ixRvGIsReK5WLQTH6ao4QvCttJwyBHLvhuPEG0Pjx8g0Ku-bXLRogKK3gOTH4PF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tb4j_DKbJI_3fP36q4JHhn00DH0jetJyaR3DbCbvWJ5TMCoYhTQVKjtXDUrN-b5xW67NKU3wfCODShPC-tnI2j0r24T8Qb58tgcB2Pn63l02VMOEe-t2ynLVjnrgt4RMW20e0h7mWIbmsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJIjjC5D5Jbja8ttTnaKC5bL6rSKR7WfJ5RM-uaD5OH-UnLqhTXHmOZ0l8Ktt3VMbnl-RK5MJkT54JAbhj-QHnU3JOmWIQHDnDR-fFMLfuyKlJm0trCbD54KKJxLhCWeIJo5t5NjPIBhUJiB5JMBan7_UJIXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC_lDjKhj5cXepJf-K6E2I6X0b6HajrjDnCr2MozXUI8LNDH3x5HKmonQDOVMUn-spTR0xORWjcy-RO7ttoyWeonWnDX5hcEJJvh0lofDUL1Db3yW6vMtg3t3qoGQpooepvoDPJc3Mv30-jdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEtJKO_CP-JK83fRTxq46fDjOQqxby26naann9aJ5nJD_benoG0pQKDPFbLljxBRJN3IOr-nj4QpP-HJ7kbJjH2tkWMMIq-U5iBDujKl0MLUOYbb0xynoDMtFN0UnMBMPe52OnaIbx3fAKftnOM46JehL3346-35543bRTLnLy5KJtMDF6ejA2jjbQepJf-K6e5D3-0PK8Kb7VbpoKQxnkbfJBDG880-nQWjueB4n4bq3zqUoNyToRXh_7yajK253Ibe7JLpc5Mlo18-Qd5q3pQT8rMlAOK5Oib4jahpR4ab3vOIOTXpO1jh8zBN5thURB2DkO-4bCWJ5TMl5jDh3Mb6ksD-FtqtJHKbDH_D-2txK; COOKIE_SESSION=52_1_6_8_0_6_0_0_5_3_2_0_0_0_0_6_0_1594270370_1594270364%7C9%2352_24_1594270364%7C9; BDUSS=1BMEZPS0kzN0pseFZjU1BrSTdjZ0lyeGkxbi1yUGV-TWt0VDhlV294TUdSaTVmRVFBQUFBJCQAAAAAAAAAAAEAAAC5v0Wa0uDUt9znAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAa5Bl8GuQZfUy; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; PSINO=6; H_PS_645EC=52eatn6PnyCM7OPugGsbKBESQM60acFqvq86ghGkn6BBLcH%2F%2FCRN1NrS2JAdOzVGw2EM; BDSVRTM=617',
            }
            yield scrapy.Request(url, headers=headers)

    def parse(self, response):
        print("当前请求头为----------:", response.request.headers['User-Agent'])
        jishu = response.xpath('//*[@id="container"]/div[2]/div/div[2]/span/text()').extract_first('')
        print(jishu)
        if jishu != null:
            path = ''
            for i in list(filter(str.isdigit, jishu)):  # 只保留数字  但返回的是数组
                path = path + str(i)

            if path != "":
                if int(path) >= 1000:

                    urllist = response.request.url
                    print('当前搜索链接' + urllist)

                    url = ''
                    for x in urllist:
                        url = url + str(x)
                    z = url.split('=')[1]
                    z = urllib.parse.unquote(z)

                    print('搜索词:' + z)
                    print('搜索量为:' + path)
                    fo = open("结果.txt", "a", newline=None, encoding="utf-8")
                    fo.write(z + "\n")
